# -*- coding: utf-8 -*-
# !/usr/bin/python
__author__ = 'bitfeng'

import re

from scrapyluke.processors import html_to_dict


def format_annualreport(content, name):
    report = {}

    details = re.findall(r'<table[^<>]*?class="detailsList"[^<>]*?>([\s\S]*?</table>)', content)
    for detail in details:
        detail = re.sub(r'<th[^<>]*?colspan="\d+"[^<>]*?>([\s\S]*?)红色为修改过的([\s\S]*?)</th>', '', detail)
        if '企业基本信息' in detail:
            key = '企业基本信息'
        else:
            key = re.findall(r'<th[^<>]*?colspan="\d+"[^<>]*?>([\s\S]*?)</th>', detail)[0]
        if '企业基本信息' in key or '企业资产状况信息' in key or '基本信息' in key:
            col = True
        else:
            col = False
        report[key] = html_to_dict(detail, '%s' % key, '\</table>', col=col)
    return {name: report}

